global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/make_final_dataset.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"

capture noi {

qui do ${code_dir}/config/labeling_finalvars.do

/* This do-file merges all dependent and independent variables and makes final transformations (taking logs and averages) */

***********************************************************************************************************************
* final dataset.do: Build the final regression datasets
***********************************************************************************************************************

************************************
*** FINAL DATASET
************************************
*standard version
if "${weight_category}" == "" {

use ${dataset_dir}/dep_vars/bvd_year_depvars, clear
mmerge BvD year using ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, unmatched(none)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_country_multinational${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_gdpcap_manuf_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_gdpcap_totind_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_devgdp_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_wages_vaemp_manuf_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_minwage_manuf_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_wages_vaemp_totind_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_manufshare_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_manufVA_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_mVAls95_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)	
if "$weight_window" == "_from1970" && "$wtype" == "tfacit1" {
	mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_lintr_manuf_sharesgdpweighted${weight_window}_${wtype}.dta, unmatched(master)
}

mmerge BvD using ${final_dir}/BvD_industry.dta, unmatched(master)
replace industry = "NA" if _m == 1
drop _m

* ensure missing_weights indicator is correct also for years without wages
foreach yy in 1995 {
	bys BvD : egen mw = max(missing_weights_1995)
	replace missing_weights_1995 = mw
	drop mw
	bys BvD : egen mw = max(missing_weights_minw_1995)
	replace missing_weights_minw_1995 = mw
	drop mw
	cap bys BvD : egen mw = max(missing_weights_lintr_1995)
	cap replace missing_weights_lintr_1995 = mw
	cap drop mw
}

* Logarithms
*main wage variables across different deflators based on patent weighted values
foreach v in lsw msw hsw {
	foreach d in LP DP MP MG {
		if ("`v'" == "msw" & "`d'" != "MP") {
			continue
		}
		gen `v'`d'm_1995_a = ln(`v'`d'm_ALL_1995_wtd )
		ren ln`v'`d'm_ALL_1995_wtd ln`v'`d'm_1995_a
		gen `v'`d't_1995_a = ln(`v'`d't_ALL_1995_wtd )
		ren ln`v'`d't_ALL_1995_wtd ln`v'`d't_1995_a
	}
}
*
foreach v in lsw hsw minw vaemp gdppc {
	foreach d in MP {
		gen `v'`d'x_1995_a = ln(`v'`d'm_ALL_1995_mtd )
		ren ln`v'`d'm_ALL_1995_mtd ln`v'`d'x_1995_a
	}
}
* Only for our main deflator across iterated values 
foreach v in lsw hsw lintr vaemp gdppc {
	foreach d in MP {
		cap gen `v'`d'y_1995_a = ln(`v'`d'm_ALL_1995_itd )
		cap ren ln`v'`d'm_ALL_1995_itd ln`v'`d'y_1995_a
	}
}
*GDP per capita and value added per employee with different deflators
foreach d in DP LP MP MG {
	gen gdppc`d'm_1995_a = ln(gdppc`d'm_ALL_1995_wtd )
	gen gdppc`d't_1995_a = ln(gdppc`d't_ALL_1995_wtd )
	ren lngdppc`d'm_ALL_1995_wtd lngdppc`d'm_1995_a
	ren lngdppc`d't_ALL_1995_wtd lngdppc`d't_1995_a

	gen vaemp`d'm_1995_a = ln(vaemp`d'm_ALL_1995_wtd )
	ren lnvaemp`d'm_ALL_1995_wtd lnvaemp`d'm_1995_a
	gen vaemp`d't_1995_a = ln(vaemp`d't_ALL_1995_wtd )
	ren lnvaemp`d't_ALL_1995_wtd lnvaemp`d't_1995_a
}
*why is the `d' still in here? its not defined anywhere
foreach v in mVA {
    gen `v'm_1995_a = ln(`v'm_ALL_1995_wtd )
	ren ln`v'`d'm_ALL_1995_wtd ln`v'`d'm_1995_a
}
foreach v in mVAls {
    gen `v'm_1995_a = ln(`v'm_ALL_1995_wtd )
	ren ln`v'`d'm_ALL_1995_wtd ln`v'`d'm_1995_a
}


* Averages, here we also define foreign and home shares (normalization is later on (shr4))
foreach vv in lngdpgap msharem { 
	ren `vv'_ALL_1995_wtd `vv'_1995_a
}
ren lngdpgap_ALL_1995_mtd lngdpgapx_1995_a
cap ren lngdpgap_ALL_1995_itd lngdpgapy_1995_a
*across main deflators
foreach v in lsw msw hsw minw vaemp {
	foreach x in m t {
		foreach d in LP DP MP MG {
			if ("`v'" == "msw" & "`d'" != "MP") {
				continue
			}
			foreach vr in shr_home shr_foreign shr2_home shr2_foreign {
				cap gen `v'`d'`x'_`vr'_1995_a = ln(`v'`d'`x'_`vr'_1995_wtd)
			}
		}
	}
}
*making foreign and home shares 
foreach v in lsw hsw minw vaemp gdppc {
	foreach vr in shr_home shr_foreign shr2_home shr2_foreign {
		gen `v'MPx_`vr'_1995_a = ln(`v'MPm_`vr'_1995_mtd)
	}
}

foreach v in lsw hsw lintr vaemp gdppc {
	foreach vr in shr_home shr_foreign shr2_home shr2_foreign {
		cap gen `v'MPy_`vr'_1995_a = ln(`v'MPm_`vr'_1995_itd)
	}
}

foreach x in m t {
	foreach d in DP LP MP MG {
		foreach vr in shr_home shr_foreign shr2_home shr2_foreign {
			gen gdppc`d'`x'_`vr'_1995_a = ln(gdppc`d'`x'_`vr'_1995_wtd)
		}
	}
}
foreach vv in lngdpgap {
	foreach vr in shr_home shr_foreign shr2_home shr2_foreign {
		ren `vv'_`vr'_1995_wtd `vv'_`vr'_1995_a
		ren `vv'_`vr'_1995_mtd `vv'x_`vr'_1995_a
		cap ren `vv'_`vr'_1995_itd `vv'y_`vr'_1995_a
	}
}
foreach v in mVA  {
    foreach vr in shr_home shr_foreign shr2_home shr2_foreign {
        cap gen `v'm_`vr'_1995_a = ln(`v'm_`vr'_1995_wtd)
    }
}
foreach v in mVAls {
    foreach vr in shr_h shr_fg shr2_h shr2_fg {
        cap gen `v'm_`vr'_1995_a = ln(`v'm_`vr'_1995_wtd)
    }
}


*normalize the shares
qui ds *_shr_home_1995_a
foreach vaar in `r(varlist)' {
	local vv = substr("`vaar'",1,strpos("`vaar'","_")-1)
	if "`vv'" == "lngdpgap" {
		continue
	}
	gen term_home_var = exp(`vv'_shr2_home_1995_a)/exp(`vv'_1995_a)*maxweight_1995
	gen term_foreign_var = exp(`vv'_shr2_foreign_1995_a)/exp(`vv'_1995_a)*(1-maxweight_1995)
	gen _term_home = term_home_var if year==1995
	gen _term_foreign = term_foreign_var if year==1995
	bys lse_id : egen term_home_fixed = max(_term_home)
	bys lse_id : egen term_foreign_fixed = max(_term_foreign)
	gen `vv'_shr4_home_1995_a = `vv'_shr2_home_1995_a*term_home_fixed
	gen `vv'_shr4_foreign_1995_a = `vv'_shr2_foreign_1995_a*term_foreign_fixed
	drop term_* _term_*
}


* for those vars with a shortcut name ...
foreach yy in 1995 {
	qui ds *_shr_h_1995_a
	foreach vaar in `r(varlist)' {
		local vv = substr("`vaar'",1,strpos("`vaar'","_")-1)
		if "`vv'" == "lngdpgap" {
			continue
		}
		gen term_foreign_var = exp(`vv'_shr2_fg_1995_a)/exp(`vv'_1995_a)*(1-maxweight_1995)
		gen _term_foreign = term_foreign_var if year==1995
		bys lse_id : egen term_foreign_fixed = max(_term_foreign)
		gen `vv'_shr4_fg_1995_a = `vv'_shr2_fg_1995_a*term_foreign_fixed
		drop term_* _term_*
	}
}

drop *_wtd *_mtd
cap drop *_itd

* Merging in spillovers
* The essentials
foreach xxx in auto80_bia auto90_bia auto95_bia pauto90_bia pauto95_bia pauto90_rm6_bia tfa_bia {
	mmerge BvD year using ${dataset_dir}/spillovers/bvd_year_spillovers_`xxx', unmatched(master)
}

* The subcomponents
foreach xxx in autm80_bia autm90_bia autm95_bia CNC80_bia CNC90_bia CNC95_bia robo80_bia robo90_bia robo95_bia autoX95_bia autonol95_bia  {
	mmerge BvD year using ${dataset_dir}/spillovers/bvd_year_spillovers_`xxx'.dta, unmatched(master)
}




*labeling
labelingvarsfinal "lsw"
labelingvarsfinal "hsw"
labelingvarsfinal "msw"
labelingvarsfinal "vaemp"
labelingvarsfinal "gdppc"
labelingvarsfinal "lngdpgap"

drop _m
compress
save ${final_dir}/regression_dataset${weight_window}_${wtype}.dta, replace

}

***********************************************************************************************************************
* Build the final regression dataset for the alternative weights
***********************************************************************************************************************

* This subscript creates the final variables and merges the alternative weights for the excluding table
*we should be able to delete the _h* variables in here, i dont think they are needed

if "${weight_category}" != "" {

*****************************************************
* Combine the different weights subtypes per category
*****************************************************

* Make a shorter indicator to include in varnames; and make locals out of globals
local wc = substr("${weight_category}", 1, 4)
local weight_category "${weight_category}"
local weight_versions "${weight_versions}"
	

*************************************************
* Append weight subtypes and transform variables  
*************************************************

* Note: We use "h" instead of "home" and "fg" instead of "foreign" here. The reason being stata's variable character limit.
use ${dataset_dir}/dep_vars/bvd_year_list_from1970_${wtype}.dta, clear


*weight versions are countries that are excluded one by one
foreach wt of local weight_versions { 
* Merge weight vars (from make_indep_vars.do) into one dataset
	mmerge BvD year using ${dataset_dir}/indep_vars/bvd_year_indepvars_sharesgdpweighted_`weight_category'_`wt'_${wtype}.dta

* Create log variables
	foreach v in lswMP hswMP vaempMP gdppcMP {
	gen `v'm_`wc'`wt'_a = ln(`v'_ALL_`wc'`wt'_wtd )
	ren ln`v'_ALL_`wc'`wt'_wtd ln`v'm_`wc'`wt'_a
	} 
	foreach v in  lngdpgap {
	ren `v'_ALL_`wc'`wt'_wtd `v'_`wc'`wt'_a
	}

* Create log home and fg variables
	foreach v in lswMP hswMP vaempMP gdppcMP {
		foreach vr in shr_h shr_fg shr2_h shr2_fg {
			 gen `v'm_`vr'_`wc'`wt'_a = ln(`v'_`vr'_`wc'`wt'_wtd) 
		} 
	}
	foreach v in lngdpgap {
		foreach vr in shr_h shr_fg shr2_h shr2_fg {
					ren `v'_`vr'_`wc'`wt'_wtd `v'_`vr'_`wc'`wt'_a
		} 	
	}

* Create the fixed ratios defined on year 1995 and interact with vars in terms of baseline 1995 weights
	if "`weight_category'" ==  "excluding"  { 
		cap confirm var maxweight_1995
		if _rc != 0 {
			rename maxshare_1995 maxweight_1995
		}
		foreach vv in lswMPm hswMPm vaempMPm gdppcMPm lngdpgap {
			if "`vv'" == "lngdpgap" {
				continue
			}
			gen term_h_var_`wc'`wt' = exp(`vv'_shr2_h_`wc'`wt'_a) / exp(`vv'_`wc'`wt'_a) * maxweight_1995 
			gen term_fg_var_`wc'`wt' = exp(`vv'_shr2_fg_`wc'`wt'_a) / exp(`vv'_`wc'`wt'_a) * (1-maxweight_1995)
			gen _term_h_`wc'`wt' = term_h_var_`wc'`wt' if year == 1995 
			gen _term_fg_`wc'`wt' = term_fg_var_`wc'`wt' if year == 1995
			bys BvD : egen term_h_fixed_`wc'`wt' = max(_term_h_`wc'`wt')
			bys BvD : egen term_fg_fixed_`wc'`wt' = max(_term_fg_`wc'`wt') 
			gen `vv'_shr4_h_`wc'`wt'_a = `vv'_shr2_h_`wc'`wt'_a * term_h_fixed_`wc'`wt'
			gen `vv'_shr4_fg_`wc'`wt'_a = `vv'_shr2_fg_`wc'`wt'_a * term_fg_fixed_`wc'`wt'
			drop term_* _term_* 
		}
	}
}
* Save the "finished" indepent vars with the different types of weights
sort BvD year
drop _merge

*labeling
labelingvarsfinal "lsw"
labelingvarsfinal "hsw"
labelingvarsfinal "vaemp"
labelingvarsfinal "gdppc"
labelingvarsfinal "lngdpgap"

compress
save ${dataset_dir}/indep_vars/bvd_year_indepvars_sharesgdpweighted_`weight_category'_final_${wtype}.dta, replace

}
}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat